library(tidyverse)
## -- Attaching packages ----------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.6
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts -------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(viridis)
## Loading required package: viridisLite
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(readr)
library(p8105.datasets)
data("instacart")
instacart %>%
janitor::clean_names()
## # A tibble: 1,384,617 x 15
## order_id product_id add_to_cart_ord~ reordered user_id eval_set
## <int> <int> <int> <int> <int> <chr>
## 1 1 49302 1 1 112108 train
## 2 1 11109 2 1 112108 train
## 3 1 10246 3 0 112108 train
## 4 1 49683 4 0 112108 train
## 5 1 43633 5 1 112108 train
## 6 1 13176 6 0 112108 train
## 7 1 47209 7 0 112108 train
## 8 1 22035 8 1 112108 train
## 9 36 39612 1 0 79431 train
## 10 36 19660 2 1 79431 train
## # ... with 1,384,607 more rows, and 9 more variables: order_number <int>,
## # order_dow <int>, order_hour_of_day <int>,
## # days_since_prior_order <int>, product_name <chr>, aisle_id <int>,
## # department_id <int>, aisle <chr>, department <chr>
# Create boxplot comparing order hour of day of fresh vs. frozen items
instacart %>%
filter(str_detect(aisle, "frozen|fresh ")) %>%
mutate(aisle = fct_reorder(aisle, order_hour_of_day)) %>%
plot_ly(y = ~order_hour_of_day, color = ~aisle, type = "box", colors = "Set2")
instacart %>%
filter(aisle == "fresh vegetables") %>%
mutate(order_dow = as.character(order_dow),
order_dow = fct_reorder(order_dow, order_hour_of_day)) %>%
plot_ly(y = ~order_hour_of_day, color = ~order_dow, type = "box",
colors = "Set2")
top_aisles = instacart %>%
count(aisle, sort = TRUE) %>%
top_n(10) %>%
select(aisle)
## Selecting by n
inner_join(instacart, top_aisles,
by = "aisle") %>%
mutate(aisle = fct_reorder(aisle, order_hour_of_day)) %>%
plot_ly(y = ~order_hour_of_day, color = ~aisle, type = "box", colors = "Set2")
instacart %>%
group_by(aisle) %>%
summarize(n = n()) %>%
arrange(-n) %>%
top_n(10) %>%
mutate(aisle = fct_reorder(aisle, n)) %>%
plot_ly(x = ~aisle, y = ~n, color = ~aisle, type = "bar")
## Selecting by n
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
instacart %>%
filter(str_detect(aisle, "fresh ")) %>%
select(aisle, order_dow, order_hour_of_day) %>%
arrange(aisle, order_dow) %>%
group_by(aisle, order_dow) %>%
mutate(mean_hour = mean(order_hour_of_day)) %>%
plot_ly(x = ~order_dow, y = ~mean_hour, type = "scatter", mode = "line", color = ~aisle)